# -*- coding: utf-8 -*-


"""
Created on Sun Feb  4 18:39:12 2024 

@author: Nicholas Drachman
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
    



def pHRatioAnalysis(path_lowpH, path_highpH, boundary_values, pHs):

    BVs = boundary_values
    
    data_lowpH = pd.read_csv(path_lowpH, header = None)
    data_highpH = pd.read_csv(path_highpH, header = None)


    mz_lowpH = np.array(data_lowpH[0])
    mz_highpH = np.array(data_highpH[0])

    Amp_lowpH = np.array(data_lowpH[1])/204800
    Amp_highpH = np.array(data_highpH[1])/204800


    N = 40
    Amp_smooth_lowpH = np.convolve(Amp_lowpH,np.ones(N)/N, mode = 'same')
    Amp_smooth_highpH = np.convolve(Amp_highpH,np.ones(N)/N, mode = 'same')

    # Sum Na peaks
    Na_lowpH = np.sum(Amp_smooth_lowpH[(mz_lowpH < BVs[0]) & (mz_lowpH > BVs[1])])
    Na_highpH = np.sum(Amp_smooth_highpH[(mz_highpH < BVs[0]) & (mz_lowpH > BVs[1])])

    # Sum Arg peaks
    His_lowpH = np.sum(Amp_smooth_lowpH[(mz_highpH > BVs[0]) & (mz_lowpH < BVs[2])])
    His_highpH = np.sum(Amp_smooth_highpH[(mz_highpH > BVs[0]) & (mz_lowpH < BVs[2])])

    ratio_lowpH = His_lowpH/Na_lowpH
    ratio_highpH = His_highpH/Na_highpH
    
    I_ratio = np.sum(Amp_highpH)/np.sum(Amp_lowpH)
    print(np.sum(Amp_highpH))
    print(np.sum(Amp_lowpH))
    

    pattern = r"Scans (\d+)-(\d+)"
    
    # Search the file name for the pattern and extract X and Y
    match_low = re.search(pattern, path_lowpH)
    match_high = re.search(pattern, path_highpH)
    
    if match_low:
        start_scan, end_scan = int(match_low.group(1)), int(match_low.group(2))
        Nscans_low = end_scan - start_scan
    else:
        Nscans_low = None
    
    if match_high:
        start_scan, end_scan = int(match_high.group(1)), int(match_high.group(2))
        Nscans_high = end_scan - start_scan
    else:
        Nscans_high = None

    # print(Nscans_low, Nscans_high)

    pattern = r"/(\d+)/"
    
    # Search the file name for the pattern and extract the number
    match = re.search(pattern, path_highpH)
    
    expNumber = match.group(1) if match else None
    

    fig, ax = plt.subplots(figsize = [3, 2.5])

    ax.plot(mz_lowpH,Amp_smooth_lowpH*80, color = 'darkorange', linestyle = '-', linewidth = 1, label = 'pH {}'.format(pHs[0]))
    ax.plot(mz_highpH,Amp_smooth_highpH*80, color = 'darkblue', linestyle = '--', linewidth = 1, label = 'pH {}'.format(pHs[1]))
    ax.set_xlim(BVs[1], BVs[2])
    ax.set_ylim(0,)
    ax.set_title(expNumber)
    ax.set_xlabel('m/z', fontsize = 8)
    ax.set_ylabel('intensity (# detections per scan)', fontsize = 8)
    ax.legend(fontsize = 8)
    ax.tick_params(direction = 'in', labelsize = 8)

    plt.tight_layout()
    
    # plt.savefig('/Users/ndrach1/Documents/Brown/Research/Manuscripts/Amino Acid Paper/Figures/Amino acid spectra vs pH/Histidine/his spec low and high pH.svg', format = 'svg')
    
    return ratio_lowpH, ratio_highpH, I_ratio

''' Histidine '''

His_pHs = np.array([5.49, 8.52])


# His exp 1

path_lowpH = 'Fig 5 and 7/Histidine/1/pH 5_49 (Scans 10000-12000, exp 15).csv'
path_highpH = 'Fig 5 and 7/Histidine/1/pH 8_52 (Scans 16000-18000, exp 15).csv'

rl1, rh1, Ir_His_1 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450], His_pHs)

# His exp 2

path_lowpH = 'Fig 5 and 7/Histidine/2/pH 5_49 (Scans 16000-19500, exp 16).csv'
path_highpH = 'Fig 5 and 7/Histidine/2/pH 8_52 (Scans 100-4000, exp 16).csv'

rl2, rh2, Ir_His_2 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450], His_pHs)


# His exp 3

path_lowpH = 'Fig 5 and 7/Histidine/3/pH 5_49 (Scans 100-5000, exp 17).csv'
path_highpH = 'Fig 5 and 7/Histidine/3/pH 8_52 (Scans 9100-12000, exp 17).csv'

rl3, rh3, Ir_His_3 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450], His_pHs)


# His exp 4

path_lowpH = 'Fig 5 and 7/Histidine/4/pH 5_49 (Scans 100-7500, exp 19).csv'
path_highpH = 'Fig 5 and 7/Histidine/4/pH 8_52 (Scans 9700-11500, exp 19).csv'

rl4, rh4, Ir_His_4 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 500], His_pHs)


# His exp 5

path_lowpH = 'Fig 5 and 7/Histidine/5/pH 5_49 (Scans 8000-20000, exp 20).csv'
path_highpH = 'Fig 5 and 7/Histidine/5/pH 8_52 (Scans 100-4000, exp 20).csv'

rl5, rh5, Ir_His_5 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450] , His_pHs)


# His exp 6

path_lowpH = 'Fig 5 and 7/Histidine/6/pH 5_49 (Scans 8000-12500, exp 22).csv'
path_highpH = 'Fig 5 and 7/Histidine/6/pH 8_52 (Scans 14000-18000, exp 22).csv'

rl6, rh6 , Ir_His_6 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450] , His_pHs)



# His exp 7

path_lowpH = 'Fig 5 and 7/Histidine/7/pH 5_49 (Scans 9000-12500, exp 28).csv'
path_highpH = 'Fig 5 and 7/Histidine/7/pH 8_52 (Scans 15000-18000, exp 28).csv'

rl7, rh7 , Ir_His_7 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 350] , His_pHs)


# His exp 8

path_lowpH = 'Fig 5 and 7/Histidine/8/pH 5_49 (Scans 7000-11000, exp 29).csv'
path_highpH = 'Fig 5 and 7/Histidine/8/pH 8_52 (Scans 100-3000, exp 29).csv'

rl8, rh8 , Ir_His_8 = pHRatioAnalysis(path_lowpH, path_highpH, [150, 50, 450] , His_pHs)


HisRatios = np.array([[rl1,rl2,rl3,rl4,rl5,rl6,rl7,rl8],[rh1,rh2,rh3,rh4,rh5,rh6,rh7,rh8]])

# Exclude exp 2 since voltage also changed between pHs
Ir_His_atios = np.array([Ir_His_1, Ir_His_3, Ir_His_4, Ir_His_5, Ir_His_6, Ir_His_7, Ir_His_8])

print(np.mean(Ir_His_atios))
print(np.std(Ir_His_atios))

# Enhancement factor = HisRatioLow/HisRatioHigh
EFs_His = HisRatios[0]/HisRatios[1]
print('His enhancement factor between pH 5.49 and 8.52 is ({:.2f} +/- {:.2f})'.format(np.mean(EFs_His), np.std(EFs_His)/np.sqrt(len(EFs_His))))





''' Arginine '''

ArgRatios = np.zeros([2,3])
Arg_pHs = np.array([9.10, 10.78])
bvs = [170, 50, 350]



# Exp 1

path_lowpH = 'Fig 5 and 7/Arginine/1/pH 9_10 (Scans 8000-11000, exp 24).csv'
path_highpH = 'Fig 5 and 7/Arginine/1/pH 10_78 (Scans 100-3000, exp 24).csv'

rl1, rh1, Ir_Arg_1 = pHRatioAnalysis(path_lowpH, path_highpH, bvs, Arg_pHs)


#  Exp 2

path_lowpH = 'Fig 5 and 7/Arginine/2/pH 9_10 (Scans 100-3000, exp 25).csv'
path_highpH = 'Fig 5 and 7/Arginine/2/pH 10_78 (Scans 0-53000, exp 26).csv'

rl2, rh2, Ir_Arg_2 = pHRatioAnalysis(path_lowpH, path_highpH, bvs, Arg_pHs)


# Exp 3

path_lowpH = 'Fig 5 and 7/Arginine/3/pH 9_10 (Scans 6500-9500, exp 27).csv'
path_highpH = 'Fig 5 and 7/Arginine/3/pH 10_78 (Scans 100-3000, exp 27).csv'

rl3, rh3, Ir_Arg_3 = pHRatioAnalysis(path_lowpH, path_highpH, bvs, Arg_pHs)


ArgRatios = np.array([[rl1,rl2,rl3,],[rh1,rh2,rh3]])

Ir_Arg_ratios = np.array([Ir_Arg_1, Ir_Arg_3,])
EFs_Arg = ArgRatios[0]/ArgRatios[1]

print(np.mean(Ir_Arg_ratios))
print(np.std(Ir_Arg_ratios))


print('Arg enhancement factor between pH 9.10 and 10.78 is ({:.2f} +/- {:.2f})'.format(np.mean(EFs_Arg), np.std(EFs_Arg)/np.sqrt(len(EFs_Arg))))





''' Fitting to model '''

''' Histidine '''

c0_M = 100e-3
c0_Na = 25e-3
pH = np.linspace(0,14,1000)
pKa = 7.59

kon = 5e10
koff = kon / 10**pKa 


D = 8e-10  # estimated diffusion coefficient of histidine based on other amino acids of similar size
I = 5e-12
r0 = 15e-9
theta = np.radians(3)
N_A = 6.022e23

kon = 5e10     
koff = kon/(10**pKa)
print('koff = {}'.format(koff))

qe = 1.6e-19
h = np.sqrt(6*D/koff)
print('h = {}'.format(h))
V = np.pi/(3*np.tan(theta)) * (r0 + h*np.tan(theta))**3 - np.pi*r0**3 / (3*np.tan(theta))
Nsites = c0_M*1e3*V*N_A
kevap = I/(qe*Nsites)
print('His kevap = {}'.format(kevap))

KM = (kevap + koff) / kon
KM = 10**(-pKa)
print('KM = {}'.format(KM))

MH_his = kon*c0_M*10**(-pH)/(koff+kevap+kon*10**(-pH))

v_his = 10**(-pH) / (KM + 10**(-pH))



fig, ax = plt.subplots(figsize = [2.5,3])

ax.plot(pH,v_his, label = '$K_M$ = {:.1E}'.format(KM))

# Measured data
EF = np.mean(EFs_His)
EF_relerr = (np.std(EFs_His)/np.sqrt(len(EFs_His)))/EF


pH0 = 5.49 
v0 = 10**(-pH0) / (KM + 10**(-pH0))
x_his = [5.49, 8.52]
y_his = [v0, v0/EF]
yerr_his = [EF_relerr * v0, EF_relerr * v0/EF]

    
ax.errorbar(x_his,y_his,yerr_his,marker = 'o',ls = 'None', capsize = 5, color = 'k', label = 'arginine data')

ax.set_ylabel('v', fontsize = 10)
ax.set_xlabel('pH', fontsize = 10)
ax.tick_params(labelsize = 8, direction = 'in')

ax.set_yscale('log')
ax.set_xlim(4,12)
ax.set_ylim(3e-3,3e0)
plt.tight_layout()



# calculate total v taking into account limited occupancy
c_max = 100e-3
phi_M = MH_his/(MH_his+c0_Na)
phi_Na = c0_Na/(MH_his+c0_Na)

v_M = c_max * phi_M * kevap 
v_Na = c_max * phi_Na * kevap

fig, ax = plt.subplots(figsize = [3.5,3])
ax.plot(pH, 1e3*v_M/kevap, label = 'His$^+$')
ax.plot(pH, 1e3*v_Na/kevap, label = 'Na$^+$')
ax.plot(x_his[0]*np.ones(100), np.linspace(0,1000,100), 'k--', lw = 1, alpha = 0.5)
ax.plot(x_his[1]*np.ones(100), np.linspace(0,1000,100), 'k--', lw = 1, alpha = 0.5)
ax.set_xlabel('pH')
ax.set_xlim(4,11)
ax.set_ylim(0,105)
ax.set_ylabel('concentration at meniscus (mM)')
ax.legend()
ax.tick_params(direction = 'in')
plt.tight_layout()
# plt.savefig('/Users/ndrach1/Documents/Brown/Research/Manuscripts/Amino Acid Paper/Figures/Amino acid spectra vs pH/Non-normalized figure/His concentration predictions.svg', format = 'svg')
# plt.savefig('/Users/ndrach1/Documents/Brown/Research/Manuscripts/Amino Acid Paper/Figures/Amino acid spectra vs pH/Non-normalized figure/His concentration predictions.pdf')







''' Arginine '''
c0_Na = 50e-3
c0_M = 100e-3
pH = np.linspace(0,14,1000)
pKa = 9.0  # Use pKa of most active group in pH range studied"

D = 6.5e-10
I = 5e-12
r0 = 15e-9
theta = np.radians(4)
N_A = 6.022e23

kon = 5e10
koff = kon*10**-pKa
print('koff = {}'.format(koff))

qe = 1.6e-19
h = np.sqrt(6*D/koff)
print('h = {}'.format(h))
V = np.pi/(3*np.tan(theta)) * (r0 + h*np.tan(theta))**3 - np.pi*r0**3 / (3*np.tan(theta))
Nsites = c0_M*1e3*V*N_A
kevap = I/(qe*Nsites)
print('Arg kevap = {}'.format(kevap))


KM = (kevap + koff) / kon
print('KM = {}'.format(KM))

MH_arg = kon*c0_M*10**(-pH)/(koff+kevap+kon*10**(-pH))


v_arg = 10**(-pH) / (KM + 10**(-pH)) # theoretical curve


fig, ax = plt.subplots(figsize = [2.5,3])

ax.plot(pH,v_arg, label = '$K_M$ = {:.1E}'.format(KM))

# Measured data
EF = np.mean(EFs_Arg)
EF_relerr = (np.std(EFs_Arg)/np.sqrt(len(EFs_Arg)))/EF


dpH = 0
pH0 = 9.10 - dpH
v0 = 10**(-pH0) / (KM + 10**(-pH0))
x_arg = [9.10-dpH, 10.78 - dpH]
y_arg = [v0, v0/EF]
yerr_arg = [EF_relerr * v0, EF_relerr * v0/EF]

ax.errorbar(x_arg,y_arg,yerr_arg,marker = 'o',ls = 'None', capsize = 5, color = 'k', label = 'arginine data')

ax.set_ylabel('v', fontsize = 10)
ax.set_xlabel('pH', fontsize = 10)
ax.tick_params(labelsize = 8, direction = 'in')

ax.set_yscale('log')
ax.set_xlim(4,12)
ax.set_ylim(3e-3,3e0)
plt.tight_layout()



# calculate total v taking into account limited occupancy
c_max = 100e-3
phi_M = MH_arg/(MH_arg+c0_Na)
phi_Na = c0_Na/(MH_arg+c0_Na)

v_M = c_max * phi_M * kevap 
v_Na = c_max * phi_Na * kevap

fig, ax = plt.subplots(figsize = [3.5,3])
ax.plot(pH, 1e3*v_M/kevap, label = 'Arg$^+$')
ax.plot(pH, 1e3*v_Na/kevap, label = 'Na$^+$')
ax.plot(x_arg[0]*np.ones(100), np.linspace(0,1000,100), 'k--', lw = 1, alpha = 0.5)
ax.plot(x_arg[1]*np.ones(100), np.linspace(0,1000,100), 'k--', lw = 1, alpha = 0.5)

ax.set_xlabel('pH')
ax.set_xlim(7,13)
ax.set_ylim(0,105)
ax.set_ylabel('concentration at meniscus (mM)')
ax.legend()
ax.tick_params(direction = 'in')
plt.tight_layout()
# plt.savefig('/Users/ndrach1/Documents/Brown/Research/Manuscripts/Amino Acid Paper/Figures/Amino acid spectra vs pH/Non-normalized figure/Arg concentration predictions.svg', format = 'svg')
# plt.savefig('/Users/ndrach1/Documents/Brown/Research/Manuscripts/Amino Acid Paper/Figures/Amino acid spectra vs pH/Non-normalized figure/Arg concentration predictions.pdf')







